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In [1]: 


import pandas as pd 


In [2]: 


df = pd.read csv('digit data.csv') 


In [3]: 


df.head() 


Out[3]: 


label pixel0  pixel1 
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5 rows x 785 columns 


In [4]: 

df.tail() 

Out[4]: 

label 

41995 0 0 0 
41996 1 0 0 
41997 7 0 0 
41998 6 0 0 
41999 9 0 0 


5 rows x 785 columns 


In [5]: 


df.shape 


Out[5]: 


(42000, 785) 
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In [6]: 


df.columns 


Out[6]: 
Index(['label', 'pixelO', 'pixel1', 'pixel2', 'pixel3', 'pixel4', "pixel 
to 'pixel6', 'pixel7', 'pixel8', 
'pixel774', 'pixel775', 'pixel776', 'pixel777', 'pixel778', 'pixel7 
idi 'pixel780', 'pixel781', 'pixel782', 'pixel783'], 
dtype-'object', length=785) 
In [7]: 


df.duplicated().sum() 


Out[7]: 


0 


In [8]: 


df.isnull().sum() 


Out[8]: 


label 

pixel@ 
pixel1 
pixel2 
pixel3 


SO OVO 


pixel779 
pixel780 
pixel781 
pixel782 
pixel783 
Length: 785, dtype: int64 


SO O OO - 


In [9]: 


df.info() 


<class 'pandas.core.frame.DataFrame'> 
RangeIndex: 42000 entries, 0 to 41999 
Columns: 785 entries, label to pixel783 
dtypes: int64(785) 

memory usage: 251.5 MB 
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In [10]: 


df.describe() 


Out[10]: 


label 


count 42000.000000 


mean 
std 
min 
25% 
50% 
75% 


max 


8 rows x 785 columns 


In [11]: 


4.456643 
2.887730 
0.000000 
2.000000 
4.000000 
7.000000 
9.000000 


df.nunique() 


out[11]: 


label 

pixel@ 
pixel1 
pixel2 
pixel3 


pixel779 
pixel780 
pixel781 
pixel782 
pixel783 


Length: 785, dtype: int64 


In [12]: 


10 
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0.0 
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0.0 
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0.0 

0.0 


pixel1 
42000.0 
0.0 

0.0 

0.0 

0.0 

0.0 

0.0 

0.0 


import matplotlib.pyplot as plt 
import seaborn as sns 


In [13]: 


import numpy as np 
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In [14]: 


import warnings 
warnings.filterwarnings(' ignore") 


In [15]: 


df['label'].unique() 


Out[15]: 


array([1, 0, 4, 7, 3, 5, 8, 9, 2, 6], dtype-int64) 


In [16]: 


df['label'].value counts() 


Out [16]: 


4684 
4401 
4351 
4188 
4177 
4137 
4132 
4072 
4063 
3795 
Name: label, dtype: int64 


Wo BOANUOWNH 


In [17]: 


plt.figure(figsize=(15,6)) 
sns.countplot(df['label'], data = df, palette = 'hls') 
plt.show() 
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count 
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In [18]: 


plt.figure(figsize=(30,20)) 

plt.pie(df['label'].value counts(), labels-df['label'].value counts().index, autopct='%1 
'color': 'black', 
'weight': 'bold', 
'family': 'serif' 3) 

hfont = ('fontname':'serif', 'weight': 'bold'j 

plt.title('label', size-20, **hfont) 

plt.show() 

4 K 


label 
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In [20]: 


import plotly.express as px 


fig = px.bar(df, x="label", y= df.index, color = ‘label') 
fig.show() 


100M 


80M 


index 
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In [21]: 


value counts = df['label'].value counts() 
fig = px.pie(names-value counts.index, values-value counts.values) 
fig.update layout( 
title-'Pie Chart of Label’, 
title x=0.5 
) 
fig.show() 


Pie Chart of Label 


In [22]: 


from sklearn.model selection import train test split 

from sklearn.linear model import LogisticRegression 

from sklearn.tree import DecisionTreeClassifier 

from sklearn.metrics import accuracy score,confusion matrix,classification report 


In [23]: 


x 
iT] 


df.drop(["label"],axis-1) 
df['label'] 


< 
iT] 
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In [24]: 


first image = X.iloc[0] 
first image = first image.to numpy().reshape(28,28) 


In [26]: 


plt.imshow(first image, cmap='binary') 
plt.axis("off") 
plt.show() 


In [27]: 


X train, X test, y train, y test - train test split(X,y,test size - 0.2, random state - 


In [28]: 


lrc - LogisticRegression(multi class - "multinomial") 
lrc.fit(X train,y train) 


Out [28]: 


iy LogisticRegression 


iLogisticRegression(mul i class="multinomial') 


In [29]: 


some digit = X test.iloc[[0]] 
some digit pred = lrc.predict(some digit) 
some digit pred 


Out [29]: 


array([8], dtype=int64) 
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In [30]: 


y test.iloc[[0]] 


Out[30]: 


5457 8 
Name: label, dtype: int64 


In [31]: 


y_pred = 1rc.predict(X_test) 


In [36]: 


print(accuracy score(y test, y_pred)) 

confusion = confusion matrix(y test, y pred) 
plt.figure(figsize=(15,6)) 

sns.heatmap(confusion, annot-True, cmap="RdBu r") 
plt.xlabel('Predicted') 

plt.ylabel('True') 

plt.show() 


0.9183333333333333 


Predicted 


localhost:8888/notebooks/digit.ipynb 9/13 


4/12/23, 9:57 PM 


In [37]: 


print(classification report(y test,y pred)) 


precision 

Q 0.96 

1 0.96 

2 0.92 

3 0.90 

4 0.93 

5 0.87 

6 0.93 

7 0.93 

8 0.88 

9 0.90 
accuracy 

macro avg 0.92 

weighted avg 0.92 


In [38]: 
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dtc = DecisionTreeClassifier(random state = 42) 


dtc.fit(X train,y train) 


Out [38]: 


iy DecisionTreeClassifier 


iDecisionTreeClassif er(random state-42)| 


In [39]: 


dtc pred = dtc.predict(X test) 


localhost:8888/notebooks/digit.ipynb 


10/13 


4/12/23, 9:57 PM 


In [40]: 


print(accuracy score(y test,dtc pred)) 
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confusion = confusion matrix(dtc pred, y pred) 


plt.figure(figsize=(15,6)) 


sns.heatmap(confusion, annot=True, fmt-'d') 


plt.xlabel('Predicted') 
plt.ylabel('True') 
plt.show() 


In [41]; 


Predicted 


from sklearn.metrics import classification report 


print(classification report(y test,dtc pred)) 
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In [42]: 


from sklearn.ensemble import RandomForestClassifier 
rfc = RandomForestClassifier(random state = 42) 
rfc.fit(X train,y train) 


Out[42]: 


iv 


In [43]: 


rfc pred = rfc.predict(X test) 


In [44]: 


print(accuracy score(y test,dtc pred)) 
confusion = confusion matrix(rfc pred, y pred) 
plt.figure(figsize=(15,6)) 
sns.heatmap(confusion, annot=True, fmt-'d') 
plt.xlabel('Predicted') 

plt.ylabel('True') 

plt.show() 


0.8525 


Predicted 
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In [45]: 
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from sklearn.metrics import classification report 


print(classification report(y test,rfc pred)) 


precision 

Q 0.98 

1 0.98 

2 0.96 

3 0.96 

4 0.96 

5 0.96 

6 0.96 

7 0.97 

8 0.95 

9 0.93 
accuracy 

macro avg 0.96 

weighted avg 0.96 
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